library(foreign)
library(Zelig)
library(xtable)
library(survey)
library(WhatIf)
library(MatchIt)
library(cem)
library(Amelia)

whofights <- read.dta("RECRUIT_SIMPLE.dta")

###########SOLDIERS AND SCHOLARS REPLICATION CODE
##1. replication
##2. multiple imputation (amelia)
##3. Testing Model Dependence - Simulating Quantities of Interest
##4. Testing Counterfactual Support in the convex hull
##5. Matching on Education; Logit Regression on Matched Data; Simulating Quantities of Interest
##6. QI's for future RUF comparison (includes matching on Mud)
##7. Repeat for RUF Model (skipping steps 3 and 4)


#########1. REPLICATION OF CDF MODEL
####################################

CDF_VARS <- c("CDF", "MUD", "EDUCATION", "SLPP", "Mende", "NOPARTY", "CDFMONEY", "CDFsafer", "CDFFRIENDS", "pctvillaccessfootboat", "FARMER", "STUDENT", "gender", "age2", "age2sq", "FREETOWN", "INF_MORT", "pWEIGHT", "EXC_SURV_LOC")

wfCDF <- whofights[!is.na(whofights$CDF), ]
wfCDF <- wfCDF[!(wfCDF$EXCOM==1 & wfCDF$CDF == 0),]
wfCDF <- wfCDF[!(wfCDF$reason_abducted=="YES" & wfCDF$CDF == 1),]
wfCDF <- na.omit(wfCDF[,CDF_VARS])
wfCDF <- wfCDF[order(-wfCDF$EXC_SURV_LOC),]

zCDF <- zelig(CDF ~ MUD + EDUCATION + SLPP + Mende + NOPARTY + CDFMONEY + CDFsafer + CDFFRIENDS + pctvillaccessfootboat + FARMER + STUDENT + gender + age2 + age2sq + FREETOWN + INF_MORT, model = "logit.survey", weights = wfCDF$pWEIGHT, ids = wfCDF$EXC_SURV_LOC, data = wfCDF)

xtable(x = summary(zCDF)$coef, caption = "CDF: Replicated Results")

#zCDFmudMoney <- zelig(CDF ~ MUD + EDUCATION + SLPP + Mende + NOPARTY + CDFMONEY + CDFsafer + CDFFRIENDS + pctvillaccessfootboat + FARMER + STUDENT + gender + age2 + age2sq + FREETOWN + INF_MORT + MUD*CDFMONEY, model = "logit.survey", weights = wfCDF$pWEIGHT, ids = wfCDF$EXC_SURV_LOC, data = wfCDF)

#zCDFmudSafety <- zelig(CDF ~ MUD + EDUCATION + SLPP + Mende + NOPARTY + CDFMONEY + CDFsafer + CDFFRIENDS + pctvillaccessfootboat + FARMER + STUDENT + gender + age2 + age2sq + FREETOWN + INF_MORT + MUD*CDFsafer, model = "logit.survey", weights = wfCDF$pWEIGHT, ids = wfCDF$EXC_SURV_LOC, data = wfCDF)

#zCDFeduMoney <- zelig(CDF ~ MUD + EDUCATION + SLPP + Mende + NOPARTY + CDFMONEY + CDFsafer + CDFFRIENDS + pctvillaccessfootboat + FARMER + STUDENT + gender + age2 + age2sq + FREETOWN + INF_MORT + EDUCATION*CDFMONEY, model = "logit.survey", weights = wfCDF$pWEIGHT, ids = wfCDF$EXC_SURV_LOC, data = wfCDF)

#zCDFeduSafety <- zelig(CDF ~ MUD + EDUCATION + SLPP + Mende + NOPARTY + CDFMONEY + CDFsafer + CDFFRIENDS + pctvillaccessfootboat + FARMER + STUDENT + gender + age2 + age2sq + FREETOWN + INF_MORT + EDUCATION*CDFsafer, model = "logit.survey", weights = wfCDF$pWEIGHT, ids = wfCDF$EXC_SURV_LOC, data = wfCDF)

#########2. Multiple Imputation (Amelia)
########################################

a.cdf <- whofights[!(whofights$EXCOM==1 & whofights$CDF == 0),]
a.cdf <- a.cdf[!is.na(a.cdf$reason_abducted),]
a.cdf <- a.cdf[!is.na(a.cdf$EXC_SURV_LOC),]
a.cdf <- a.cdf[!(a.cdf$reason_abducted=="YES" & a.cdf$CDF == 1),]
a.cdf <- a.cdf[,CDF_VARS]
a.cdf$EDUCATION <- a.cdf$EDUCATION +1

set.seed(123456)

a.cdf.out <- amelia(x = a.cdf, m = 5, p2s = 1, noms = c("CDF", "MUD", "SLPP", "Mende", "NOPARTY", "CDFFRIENDS", "FARMER", "STUDENT", "gender", "FREETOWN", "CDFMONEY"), ords = c("pctvillaccessfootboat", "CDFsafer", "EDUCATION"))
a.cdf.data <- a.cdf.out$imputations[1:5]

#set edu back to 0,1,2
for (i in 1:5) {
	tdata <- a.cdf.data[[i]]
	tdata$EDUCATION <- tdata$EDUCATION-1
	a.cdf.data[[i]] <- tdata
	}

z.aCDF <- zelig(CDF ~ MUD + EDUCATION + SLPP + Mende + NOPARTY + CDFMONEY + CDFsafer + CDFFRIENDS + pctvillaccessfootboat + FARMER + STUDENT + gender + age2 + age2sq + FREETOWN + INF_MORT, model = "logit.survey", weights = a.cdf$pWEIGHT, ids = a.cdf$EXC_SURV_LOC, data = mi(a.cdf.data[[1]], a.cdf.data[[2]], a.cdf.data[[3]], a.cdf.data[[4]], a.cdf.data[[5]]))

xtable(x = summary(z.aCDF)$coef, caption = "CDF: Multiply Imputed Results")


#########3. Testing Model Dependence - Simulating Quantities of Interest
########################################################################

#CDFMONEY as treatment on full model (with imputed data)
x0.out <- setx(z.aCDF, CDFMONEY = 0)
x1.out <- setx(z.aCDF, CDFMONEY = 1)
s.money <- sim(z.aCDF, x = x0.out, x1 = x1.out)

#Removing "Safer" from the model
z.safer <- zelig(CDF ~ MUD + EDUCATION + SLPP + Mende + NOPARTY + CDFMONEY + CDFFRIENDS + pctvillaccessfootboat + FARMER + STUDENT + gender + age2 + age2sq + FREETOWN + INF_MORT, model = "logit.survey", weights = a.cdf$pWEIGHT, ids = a.cdf$EXC_SURV_LOC, data = mi(a.cdf.data[[1]], a.cdf.data[[2]], a.cdf.data[[3]], a.cdf.data[[4]], a.cdf.data[[5]]))

#CDFMONEY as treatment on model without safer
x0.out <- setx(z.safer, CDFMONEY = 0)
x1.out <- setx(z.safer, CDFMONEY = 1)
s.safer <- sim(z.safer, x = x0.out, x1 = x1.out)

#adding interaction term: farmer and CDFmoney

z.cdf.fm <- zelig(CDF ~ MUD + EDUCATION + SLPP + Mende + NOPARTY + CDFMONEY + CDFsafer + CDFFRIENDS + pctvillaccessfootboat + FARMER + STUDENT + gender + age2 + age2sq + FREETOWN + INF_MORT + FARMER * CDFMONEY, model = "logit.survey", weights = a.cdf$pWEIGHT, ids = a.cdf$EXC_SURV_LOC, data = mi(a.cdf.data[[1]], a.cdf.data[[2]], a.cdf.data[[3]], a.cdf.data[[4]], a.cdf.data[[5]]))

x0.out <- setx(z.cdf.fm, CDFMONEY = 0)
x1.out <- setx(z.cdf.fm, CDFMONEY = 1)
s.fm <- sim(z.cdf.fm, x = x0.out, x1 = x1.out)

#summary of model qi's
summary(s.money)
summary(s.safer)
summary(s.fm)

#create the model dependence plot
pdf(file = "modelDepend.pdf", height = 5, width = 5)
plot(density(s.money$qi$rr), xlim = c(0,60), ylim = c(0, .35), lty = 1, main = "Difference in Risk Ratios by Model", xlab = "P(CDF = 1|Money = 1)/P(CDF = 1|Money = 0)", ylab = "Density")
lines(density(s.safer$qi$rr), lty = 2)
lines(density(s.fm$qi$rr), lty = 3)
legend(x = "topright", legend = c("Imputed", "'Safer' Removed", "Interaction Added"), lty = c(1,2,3))
dev.off()

#########4. Testing Counterfactual Support in the convex hull
##############################################################

edu.wi <- list()
for (i in 1:5) {
	edu.hull <- a.cdf.data[[i]]
	edu.hull$EXC_SURV_LOC <- NULL; edu.hull$pWEIGHT <- NULL
	edu.reg <- edu.hull
	edu.hull[edu.hull$EDUCATION != 2,]$EDUCATION <- ifelse (edu.hull[edu.hull$EDUCATION != 2,]$EDUCATION == 3,1,3)
	edu.wi[[i]] <- whatif(data = edu.reg, cfact = as.matrix(edu.hull))
	} 
#265/729 for each

money.hull <- a.cdf.data[[1]]
money.hull$EXC_SURV_LOC <- NULL; money.hull$pWEIGHT <- NULL
money.reg <- money.hull
money.hull$CDFMONEY <- ifelse(money.hull$CDFMONEY == 1,0,1)
money.wi <- whatif(data = money.reg, cfact = as.matrix(money.hull))
#0/729

mud.hull <- a.cdf.data[[1]]
mud.hull$EXC_SURV_LOC <- NULL; mud.hull$pWEIGHT <- NULL
mud.reg <- mud.hull
mud.hull$MUD <- ifelse(mud.hull$MUD == 1,0,1)
mud.wi <- whatif(data = mud.reg, cfact = as.matrix(mud.hull))
#6/729

#(repeat for other variables if you like)

#education has the best counterfactual coverage, so we will procede with that one as causal variable


#########5. Matching on Education; Logit Regression on Matched Data; Simulating Quantities of Interest
#########################################################################################################

#remove women
a.cdf.men <- list()
for (i in 1:5) {
	tdata <- a.cdf.data[[i]]
	men <- tdata$gender==1
	tdata.men <- tdata[men,]
	tdata.men$gender <- NULL
	a.cdf.men[[i]] <- tdata.men
	}


#remove money==1
a.cdf.mm <- list()
for (i in 1:5) {
	tdata <- a.cdf.men[[i]]
	money <- tdata$CDFMONEY==0
	tdata.money <- tdata[money,]
	tdata.money$CDFMONEY <- NULL
	a.cdf.mm[[i]] <- tdata.money
	}


#note ratio of weighted CDF = 1 and weighted CDF = 0 in no women, no money dataset
w0 <- c()
w1 <- c()
for (i in 1:5) {
	tdata <- a.cdf.mm[[i]]
	w0[i] <- sum(tdata[tdata$CDF==0, "pWEIGHT"])
	w1[i] <- sum(tdata[tdata$CDF==1, "pWEIGHT"])
	}

w0/(w0 + w1) #~98.5%

agecut <- c(18, 21, 25, 30, 35, 40, 45, 50, 60, 70, 80)
infmcut <- c(0.17, 0.18, 0.19, 0.21, 0.24, 0.6)
pctvcut <- c(10, 20, 30, 40, 50, 60, 70, 101)

match.mm <- cem(treatment = "EDUCATION", datalist = a.cdf.mm, drop = c("CDF", "age2sq", "CDFsafer", "EXC_SURV_LOC", "pWEIGHT"), cutpoints = list(age2 = agecut, INF_MORT = infmcut, pctvillaccessfootboat = pctvcut), eval.imbalance = FALSE, verbose=3)

###note that most observations for CDFFRIENDS==1 are dropped

#matched data with cem weights
mdata <- list()
for (i in 1:5) {
	tdata <- a.cdf.mm[[i]]
	tdata <- tdata[match.mm[[i]]$matched,]
	cemweight <- match.mm[[i]]$w[match.mm[[i]]$matched]
	mdata[[i]] <- cbind(tdata, cemweight)
	}

#re-weight matched data
mdata.n0 <- c()
mdata.n1 <- c()
for (i in 1:5) {
	tdata <- mdata[[i]]
	mdata.n0[i] <- nrow(tdata[tdata$CDF==0,])
	mdata.n1[i] <- nrow(tdata[tdata$CDF==1,])
	}

rw0 <- 98.5 * mdata.n1 / mdata.n0 #new weights

for (i in 1:5){
	tdata <- mdata[[i]]
	tweight <- ifelse(tdata$CDF==1, 1, rw0[i])
	tweight <- tweight*tdata$cemweight #cem weight times proper ratio of CDF=0/total or CDF=1/total
	mdata[[i]] <- cbind(tdata, tweight)
	}

#fit models
z.mdata <- list()
for (i in 1:5){
	z.mdata[[i]] <- zelig(CDF ~ EDUCATION + pctvillaccessfootboat + age2 + age2sq + INF_MORT, weights = mdata[[i]]$tweight, model = "logit.survey", data = mdata[[i]])
	}

#calculate combined estimates and SE
estMat <- matrix(data = NA, nrow = 5, ncol = 6)
varMat <- matrix(data = NA, nrow = 5, ncol = 6)
for (i in 1:5){
	for (j in 1:ncol(estMat)) {
		estMat[i,j] <- z.mdata[[i]]$coef[j] 
		}
	vc <- vcov(z.mdata[[i]])
	varMat[i,] <- diag(vc)
	}

estVec.cdf.edu <- apply(X = estMat, MARGIN = 2, FUN = mean)
seVec.cdf.edu <- c()
for (i in 1:ncol(varMat)) {
	seVec.cdf.edu[i] <- mean(varMat[,i]) + var(estMat[,i])*(1 + 1/5)
	}
seVec.cdf.edu <- sqrt(seVec.cdf.edu)

estVec.cdf.edu
seVec.cdf.edu

#####full model (sans safer); matched data (for comparison with reduced model)
z.full.cdf.ss <- zelig(CDF ~ MUD + EDUCATION + SLPP + Mende + NOPARTY + CDFFRIENDS + pctvillaccessfootboat + FARMER + STUDENT + age2 + age2sq + FREETOWN + INF_MORT, weights = mdata[[1]]$tweight, model = "logit.survey", data = mdata[[1]])
#####interaction model (sans safer); matched data (for comparison with reduced model)
z.inter.cdf.ss <- zelig(CDF ~ MUD + EDUCATION + SLPP + Mende + NOPARTY + CDFFRIENDS + pctvillaccessfootboat + FARMER + STUDENT + age2 + age2sq + FREETOWN + INF_MORT + farmer*money, weights = mdata[[1]]$tweight, model = "logit.survey", data = mdata[[1]])

summary(z.full.cdf.ss)
summary(z.inter.cdf.ss)


#simulate qi's

x0.mdata <- list()
x1.mdata <- list()
s01.mdata <- list()
for (i in 1:5) {
	x0.mdata[[i]] <- setx(z.mdata[[i]], EDUCATION = 0)
	x1.mdata[[i]] <- setx(z.mdata[[i]], EDUCATION = 1)
	s01.mdata[[i]] <- sim(z.mdata[[i]], x = x0.mdata[[i]], x1 = x1.mdata[[i]])
	}

s12.mdata <- list()
for (i in 1:5) {
	x0.mdata[[i]] <- setx(z.mdata[[i]], EDUCATION = 1)
	x1.mdata[[i]] <- setx(z.mdata[[i]], EDUCATION = 2)
	s12.mdata[[i]] <- sim(z.mdata[[i]], x = x0.mdata[[i]], x1 = x1.mdata[[i]])
	}

s02.mdata <- list()
for (i in 1:5) {
	x0.mdata[[i]] <- setx(z.mdata[[i]], EDUCATION = 0)
	x1.mdata[[i]] <- setx(z.mdata[[i]], EDUCATION = 2)
	s02.mdata[[i]] <- sim(z.mdata[[i]], x = x0.mdata[[i]], x1 = x1.mdata[[i]])
	}

rr01 <- c()
rr12 <- c()
rr02 <- c()
for (i in 1:5) {
	rr01[i] <- mean(s01.mdata[[i]]$qi$rr)
	rr12[i] <- mean(s12.mdata[[i]]$qi$rr)
	rr02[i] <- mean(s02.mdata[[i]]$qi$rr)
	}


pdf(file = "eduRR.pdf", width = 8, height = 5)
par(mfrow = c(1,2))
plot(density(s01.mdata[[1]]$qi$rr), xlim = c(0,200), xlab = "P(CDF = 1|EDU = 1)/P(CDF = 1|EDU = 0)", main = "")
lines(density(s01.mdata[[2]]$qi$rr))
lines(density(s01.mdata[[3]]$qi$rr))
lines(density(s01.mdata[[4]]$qi$rr))
lines(density(s01.mdata[[5]]$qi$rr))
plot(density(s12.mdata[[1]]$qi$rr), ylim = c(0, 0.05), xlab = "P(CDF = 1|EDU = 2)/P(CDF = 1|EDU = 1)", main = "")
lines(density(s12.mdata[[2]]$qi$rr))
lines(density(s12.mdata[[3]]$qi$rr))
lines(density(s12.mdata[[4]]$qi$rr))
lines(density(s12.mdata[[5]]$qi$rr))
dev.off()


x0.edu <- list()
x1.edu <- list()
x2.edu <- list()
s0.edu <- list()
s1.edu <- list()
s2.edu <- list()
for (i in 1:5) {
	x0.edu[[i]] <- setx(z.mdata[[i]], EDUCATION = 0)
	s0.edu[[i]] <- sim(z.mdata[[i]], x = x0.edu[[i]])
	x1.edu[[i]] <- setx(z.mdata[[i]], EDUCATION = 1)
	s1.edu[[i]] <- sim(z.mdata[[i]], x = x1.edu[[i]])
	x2.edu[[i]] <- setx(z.mdata[[i]], EDUCATION = 2)
	s2.edu[[i]] <- sim(z.mdata[[i]], x = x2.edu[[i]])
	}

s0.ev <- rbind(s0.edu[[1]]$qi$ev, s0.edu[[2]]$qi$ev, s0.edu[[3]]$qi$ev, s0.edu[[4]]$qi$ev, s0.edu[[5]]$qi$ev)
s1.ev <- rbind(s1.edu[[1]]$qi$ev, s1.edu[[2]]$qi$ev, s1.edu[[3]]$qi$ev, s1.edu[[4]]$qi$ev, s1.edu[[5]]$qi$ev)
s2.ev <- rbind(s2.edu[[1]]$qi$ev, s2.edu[[2]]$qi$ev, s2.edu[[3]]$qi$ev, s2.edu[[4]]$qi$ev, s2.edu[[5]]$qi$ev)

pdf(file = "eduEV.pdf", width = 15, height = 5)
par(mfrow = c(1,3))
plot(density(s0.ev), main = "Expect Value of CDF | EDU = 0", xlab = "")
plot(density(s1.ev), main = "Expect Value of CDF | EDU = 1", xlab = "")
plot(density(s2.ev), main = "Expect Value of CDF | EDU = 2", xlab = "")
dev.off()


###########6. QI's for future RUF comparison
############################################
x0.mdata <- list()
x1.mdata <- list()
s.edu.cdf <- list()
for (i in 1:5) {
	x0.mdata[[i]] <- setx(z.mdata[[i]], EDUCATION = 0)
	x1.mdata[[i]] <- setx(z.mdata[[i]], EDUCATION = 2)
	s.edu.cdf[[i]] <- sim(z.mdata[[i]], x = x0.mdata[[i]], x1 = x1.mdata[[i]])
	}

##MUD as treatment variable
###########################

match.mm <- cem(treatment = "MUD", datalist = a.cdf.mm, drop = c("CDF", "age2sq", "CDFsafer", "EXC_SURV_LOC", "pWEIGHT"), cutpoints = list(age2 = agecut, INF_MORT = infmcut, pctvillaccessfootboat = pctvcut), eval.imbalance = FALSE, verbose=3)

#matched data with cem weights
mdata <- list()
for (i in 1:5) {
	tdata <- a.cdf.mm[[i]]
	tdata <- tdata[match.mm[[i]]$matched,]
	cemweight <- match.mm[[i]]$w[match.mm[[i]]$matched]
	mdata[[i]] <- cbind(tdata, cemweight)
	}

#re-weight matched data
mdata.n0 <- c()
mdata.n1 <- c()
for (i in 1:5) {
	tdata <- mdata[[i]]
	mdata.n0[i] <- nrow(tdata[tdata$CDF==0,])
	mdata.n1[i] <- nrow(tdata[tdata$CDF==1,])
	}

rw0 <- 98.5 * mdata.n1 / mdata.n0 #new weights

for (i in 1:5){
	tdata <- mdata[[i]]
	tweight <- ifelse(tdata$CDF==1, 1, rw0[i])
	tweight <- tweight*tdata$cemweight #cem weight times proper ratio of CDF=0/total or CDF=1/total
	mdata[[i]] <- cbind(tdata, tweight)
	}

#fit models
z.mdata <- list()
for (i in 1:5){
	z.mdata[[i]] <- zelig(CDF ~ MUD + pctvillaccessfootboat + age2 + age2sq + INF_MORT, weights = mdata[[i]]$tweight, model = "logit.survey", data = mdata[[i]])
	}

#calculate combined estimates and SE
estMat <- matrix(data = NA, nrow = 5, ncol = 6)
varMat <- matrix(data = NA, nrow = 5, ncol = 6)
for (i in 1:5){
	for (j in 1:ncol(estMat)) {
		estMat[i,j] <- z.mdata[[i]]$coef[j] 
		}
	vc <- vcov(z.mdata[[i]])
	varMat[i,] <- diag(vc)
	}

estVec.cdf.mud <- apply(X = estMat, MARGIN = 2, FUN = mean)
seVec.cdf.mud <- c()
for (i in 1:ncol(varMat)) {
	seVec.cdf.mud[i] <- mean(varMat[,i]) + var(estMat[,i])*(1 + 1/5)
	}
seVec.cdf.mud <- sqrt(seVec.cdf.mud)

estVec.cdf.mud
seVec.cdf.mud


#simulate qi's
x0.mdata <- list()
x1.mdata <- list()
s.mud.cdf <- list()
for (i in 1:5) {
	x0.mdata[[i]] <- setx(z.mdata[[i]], MUD = 0)
	x1.mdata[[i]] <- setx(z.mdata[[i]], MUD = 1)
	s.mud.cdf[[i]] <- sim(z.mdata[[i]], x = x0.mdata[[i]], x1 = x1.mdata[[i]])
	}

rr.mud.cdf <- c()
for (i in 1:5) {
	rr.mud.cdf[i] <- mean(s.mud.cdf[[i]]$qi$rr)
	}
###########save above qi's for later comparison section
#######################################################


########7. Same Procedure, RUF MODEL
####################################

##replication of RUF LOGIT MODEL
RUF_VARS <- c("RUF2", "MUD", "EDUCATION", "SLPP", "Mende", "NOPARTY", "RUFMONEY", "RUFsafer", "RUFFRIENDS", "pctvillaccessfootboat", "FARMER", "STUDENT", "gender", "age2", "age2sq", "FREETOWN", "INF_MORT", "pWEIGHT", "EXC_SURV_LOC", "reason_abducted", "EXCOM")

wfRUF <- whofights[!is.na(whofights$RUF2),]
wfRUF <- na.omit(wfRUF[,RUF_VARS])
wfRUF <- wfRUF[order(-wfRUF$EXC_SURV_LOC),]

zRUF <- zelig(RUF2 ~ MUD + EDUCATION + SLPP + Mende + NOPARTY + RUFMONEY + RUFsafer + RUFFRIENDS + pctvillaccessfootboat + FARMER + STUDENT + gender + age2 + age2sq + FREETOWN + INF_MORT, model = "logit.survey", weights = wfRUF$pWEIGHT, ids = wfRUF$EXC_SURV_LOC, data = wfRUF)

##multiple imputation
MRUF_VARS <- c("RUF2", "M_RUF", "MUD", "EDUCATION", "SLPP", "Mende", "NOPARTY", "RUFMONEY", "RUFsafer", "RUFFRIENDS", "pctvillaccessfootboat", "FARMER", "STUDENT", "gender", "age2", "age2sq", "FREETOWN", "INF_MORT", "pWEIGHT", "EXC_SURV_LOC")

rufvol <- whofights[!is.na(whofights$M_RUF),]
rufvol <- rufvol[,MRUF_VARS]
rufvol <- rufvol[!is.na(rufvol$EXC_SURV_LOC),]
rufvol$EDUCATION = rufvol$EDUCATION + 1

set.seed(123456)

a.ruf.out <- amelia(x = rufvol, m = 5, p2s = 1, noms = c("RUF2", "M_RUF", "MUD", "SLPP", "Mende", "NOPARTY", "RUFFRIENDS", "FARMER", "STUDENT", "gender", "FREETOWN", "RUFMONEY"), ords = c("pctvillaccessfootboat", "RUFsafer", "EDUCATION"))

a.ruf.data <- a.ruf.out$imputations[1:5]

#set edu back to 0,1,2
for (i in 1:5) {
	tdata <- a.ruf.data[[i]]
	tdata$EDUCATION <- tdata$EDUCATION-1
	a.ruf.data[[i]] <- tdata
	}

#########a.ruf.data is full, imputed dataset

#compare with replication
z.ruf <- zelig(RUF2 ~ MUD + EDUCATION + SLPP + Mende + NOPARTY + RUFMONEY + RUFsafer + RUFFRIENDS + pctvillaccessfootboat + FARMER + STUDENT + gender + age2 + age2sq + FREETOWN + INF_MORT, model = "logit.survey", weights = a.ruf.data[[1]]$pWEIGHT, ids = a.ruf.data[[1]]$EXC_SURV_LOC, data = mi(a.ruf.data[[1]], a.ruf.data[[2]], a.ruf.data[[3]], a.ruf.data[[4]], a.ruf.data[[5]]))

summary(z.ruf)

#drop women (only 6 out of 69 volunteers joined RUF)
a.ruf.men <- list()
for (i in 1:5) {
	tdata <- a.ruf.data[[i]]
	men <- tdata$gender==1
	tdata.men <- tdata[men,]
	tdata.men$gender <- NULL
	a.ruf.men[[i]] <- tdata.men
	}

#drop money==1
a.ruf.mm <- list()
for (i in 1:5) {
	tdata <- a.ruf.men[[i]]
	money <- tdata$RUFMONEY==0
	tdata.money <- tdata[money,]
	tdata.money$RUFMONEY <- NULL
	a.ruf.mm[[i]] <- tdata.money
	}

#calculate ratio of ex-coms/total
w0 <- c()
w1 <- c()
for (i in 1:5) {
	tdata <- a.ruf.mm[[i]]
	w0[i] <- sum(tdata[tdata$RUF2==0, "pWEIGHT"])
	w1[i] <- sum(tdata[tdata$RUF2==1, "pWEIGHT"])
	}

w0/(w0 + w1) #~99%

#volunteers only and drop M_RUF (drop abductee observations)
a.rufvol <- list()
for (i in 1:5) {
	tdata <- a.ruf.data[[i]]
	tdata <- tdata[!(tdata$M_RUF==1),]
	tdata$M_RUF <- NULL
	a.rufvol[[i]] <- tdata
	}


agecut <- c(18, 21, 25, 30, 35, 40, 45, 50, 60, 70, 80)
infmcut <- c(0.17, 0.18, 0.19, 0.21, 0.24, 0.6)
pctvcut <- c(10, 20, 30, 40, 50, 60, 70, 101)

match.rufvol <- cem(treatment = "EDUCATION", datalist = a.rufvol, drop = c("RUF2", "age2sq", "RUFsafer", "EXC_SURV_LOC", "pWEIGHT", "age2", "pctvillaccessfootboat", "INF_MORT"), eval.imbalance = FALSE, verbose=3)

mdata <- list()
for (i in 1:5) {
	tdata <- a.rufvol[[i]]
	tdata <- tdata[match.rufvol[[i]]$matched,]
	cemweight <- match.rufvol[[i]]$w[match.rufvol[[i]]$matched]
	mdata[[i]] <- cbind(tdata, cemweight)
	}

#lose all FREETOWN = 1 observations; RUFFRIENDS = 1; RUFMONEY = 1; students; =/

#re-weight matched data
mdata.n0 <- c()
mdata.n1 <- c()
for (i in 1:5) {
	tdata <- mdata[[i]]
	mdata.n0[i] <- nrow(tdata[tdata$RUF2==0,])
	mdata.n1[i] <- nrow(tdata[tdata$RUF2==1,])
	}

rw0 <- 99 * mdata.n1 / mdata.n0 #new weights

for (i in 1:5){
	tdata <- mdata[[i]]
	tweight <- ifelse(tdata$RUF2==1, 1, rw0[i])
	tweight <- tweight*tdata$cemweight #cem weight times proper ratio of RUF=0/total or RUF=1/total
	mdata[[i]] <- cbind(tdata, tweight)
	}

#fit models
z.mdata <- list()
for (i in 1:5){
	z.mdata[[i]] <- zelig(RUF2 ~ EDUCATION + pctvillaccessfootboat + age2 + age2sq + INF_MORT, weights = mdata[[i]]$tweight, model = "logit.survey", data = mdata[[i]])
	}

#calculate combined estimates and SE
estMat <- matrix(data = NA, nrow = 5, ncol = 6)
varMat <- matrix(data = NA, nrow = 5, ncol = 6)
for (i in 1:5){
	for (j in 1:ncol(estMat)) {
		estMat[i,j] <- z.mdata[[i]]$coef[j] 
		}
	vc <- vcov(z.mdata[[i]])
	varMat[i,] <- diag(vc)
	}

estVec.ruf.edu <- apply(X = estMat, MARGIN = 2, FUN = mean)
seVec.ruf.edu <- c()
for (i in 1:ncol(varMat)) {
	seVec.ruf.edu[i] <- mean(varMat[,i]) + var(estMat[,i])*(1 + 1/5)
	}
seVec.ruf.edu <- sqrt(seVec.ruf.edu)

estVec.ruf.edu
seVec.ruf.edu

#simulate qi's
x0.mdata <- list()
x1.mdata <- list()
s.edu.ruf <- list()
for (i in 1:5) {
	x0.mdata[[i]] <- setx(z.mdata[[i]], EDUCATION = 0)
	x1.mdata[[i]] <- setx(z.mdata[[i]], EDUCATION = 2)
	s.edu.ruf[[i]] <- sim(z.mdata[[i]], x = x0.mdata[[i]], x1 = x1.mdata[[i]])
	}

match.rufvol <- cem(treatment = "MUD", datalist = a.rufvol, drop = c("RUF2", "age2sq", "RUFsafer", "EXC_SURV_LOC", "pWEIGHT", "age2", "pctvillaccessfootboat", "INF_MORT"), eval.imbalance = FALSE, verbose=3)

mdata <- list()
for (i in 1:5) {
	tdata <- a.rufvol[[i]]
	tdata <- tdata[match.rufvol[[i]]$matched,]
	cemweight <- match.rufvol[[i]]$w[match.rufvol[[i]]$matched]
	mdata[[i]] <- cbind(tdata, cemweight)
	}

#lose most RUFFRIENDS = 1 observations; RUFMONEY = 1; farmers

#re-weight matched data
mdata.n0 <- c()
mdata.n1 <- c()
for (i in 1:5) {
	tdata <- mdata[[i]]
	mdata.n0[i] <- nrow(tdata[tdata$RUF2==0,])
	mdata.n1[i] <- nrow(tdata[tdata$RUF2==1,])
	}

rw0 <- 99 * mdata.n1 / mdata.n0 #new weights

for (i in 1:5){
	tdata <- mdata[[i]]
	tweight <- ifelse(tdata$RUF2==1, 1, rw0[i])
	tweight <- tweight*tdata$cemweight #cem weight times proper ratio of RUF=0/total or RUF=1/total
	mdata[[i]] <- cbind(tdata, tweight)
	}

#fit models
z.mdata <- list()
for (i in 1:5){
	z.mdata[[i]] <- zelig(RUF2 ~ MUD + pctvillaccessfootboat + age2 + age2sq + INF_MORT, weights = mdata[[i]]$tweight, model = "logit.survey", data = mdata[[i]])
	}

#calculate combined estimates and SE
estMat <- matrix(data = NA, nrow = 5, ncol = 6)
varMat <- matrix(data = NA, nrow = 5, ncol = 6)
for (i in 1:5){
	for (j in 1:ncol(estMat)) {
		estMat[i,j] <- z.mdata[[i]]$coef[j] 
		}
	vc <- vcov(z.mdata[[i]])
	varMat[i,] <- diag(vc)
	}

estVec.ruf.mud <- apply(X = estMat, MARGIN = 2, FUN = mean)
seVec.ruf.mud <- c()
for (i in 1:ncol(varMat)) {
	seVec.ruf.mud[i] <- mean(varMat[,i]) + var(estMat[,i])*(1 + 1/5)
	}
seVec.ruf.mud <- sqrt(seVec.ruf.mud)

estVec.ruf.mud
seVec.ruf.mud

#simulate qi's
x0.mdata <- list()
x1.mdata <- list()
s.mud.ruf <- list()
for (i in 1:5) {
	x0.mdata[[i]] <- setx(z.mdata[[i]], MUD = 0)
	x1.mdata[[i]] <- setx(z.mdata[[i]], MUD = 1)
	s.mud.ruf[[i]] <- sim(z.mdata[[i]], x = x0.mdata[[i]], x1 = x1.mdata[[i]])
	}

rr.mud.ruf <- c()
fd.mud.ruf <- c()
for (i in 1:5) {
	rr.mud.ruf[i] <- mean(s.mud.ruf[[i]]$qi$rr)
	fd.mud.ruf[i] <- mean(s.mud.ruf[[i]]$qi$fd)
	}

##############begin RUF/CDF comparison combination
##################################################


pdf(file = "MUD.pdf", width = 8, height = 5)
par(mfrow = c(1,2))
plot(density(s.mud.cdf[[1]]$qi$rr), ylim = c(0, 1.2), xlim = c(0,20), xlab = "P(Y = 1|MUD = 1)/P(Y = 1|MUD = 0)", main = "")
lines(density(s.mud.cdf[[2]]$qi$rr))
lines(density(s.mud.cdf[[3]]$qi$rr))
lines(density(s.mud.cdf[[4]]$qi$rr))
lines(density(s.mud.cdf[[5]]$qi$rr))
lines(density(s.mud.ruf[[1]]$qi$rr), lty = 2)
lines(density(s.mud.ruf[[2]]$qi$rr), lty = 2)
lines(density(s.mud.ruf[[3]]$qi$rr), lty = 2)
lines(density(s.mud.ruf[[4]]$qi$rr), lty = 2)
lines(density(s.mud.ruf[[5]]$qi$rr), lty = 2)
legend(x = "topright", legend = c("CDF", "RUF"), lty = c(1,2))
plot(density(s.mud.cdf[[1]]$qi$fd), ylim = c(0,100), main = "", xlab = "E(Y|MUD = 1) - E(Y|MUD = 0)")
lines(density(s.mud.cdf[[2]]$qi$fd))
lines(density(s.mud.cdf[[3]]$qi$fd))
lines(density(s.mud.cdf[[4]]$qi$fd))
lines(density(s.mud.cdf[[5]]$qi$fd))
lines(density(s.mud.ruf[[1]]$qi$fd), lty = 2)
lines(density(s.mud.ruf[[2]]$qi$fd), lty = 2)
lines(density(s.mud.ruf[[3]]$qi$fd), lty = 2)
lines(density(s.mud.ruf[[4]]$qi$fd), lty = 2)
lines(density(s.mud.ruf[[5]]$qi$fd), lty = 2)
legend(x = "topleft", legend = c("CDF", "RUF"), lty = c(1,2))
dev.off()

pdf(file = "EDU.pdf", width = 8, height = 8)
par(mfrow = c(2,2))
plot(density(s.edu.cdf[[1]]$qi$rr), xlim = c(0,4000), xlab = "P(CDF = 1|EDU = 2)/P(CDF = 1|EDU = 0)", main = "")
lines(density(s.edu.cdf[[2]]$qi$rr))
lines(density(s.edu.cdf[[3]]$qi$rr))
lines(density(s.edu.cdf[[4]]$qi$rr))
lines(density(s.edu.cdf[[5]]$qi$rr))
plot(density(s.edu.cdf[[1]]$qi$fd), main = "", ylim = c(0,2.5), xlab = "E(CDF|EDU = 2) - E(CDF|EDU = 0)")
lines(density(s.edu.cdf[[2]]$qi$fd))
lines(density(s.edu.cdf[[3]]$qi$fd))
lines(density(s.edu.cdf[[4]]$qi$fd))
lines(density(s.edu.cdf[[5]]$qi$fd))
plot(density(s.edu.ruf[[1]]$qi$rr), ylim = c(0,.12), xlab = "P(RUF = 1|EDU = 2)/P(RUF = 1|EDU = 0)", main = "")
lines(density(s.edu.ruf[[2]]$qi$rr))
lines(density(s.edu.ruf[[3]]$qi$rr))
lines(density(s.edu.ruf[[4]]$qi$rr))
lines(density(s.edu.ruf[[5]]$qi$rr))
plot(density(s.edu.ruf[[1]]$qi$fd), main = "", ylim = c(0, 60), xlab = "E(RUF|EDU = 2) - E(RUF|EDU = 0)")
lines(density(s.edu.ruf[[2]]$qi$fd))
lines(density(s.edu.ruf[[3]]$qi$fd))
lines(density(s.edu.ruf[[4]]$qi$fd))
lines(density(s.edu.ruf[[5]]$qi$fd))
dev.off()

#######all done
###############

